library(SmarterPoland)
#Get the relevant table from the 2001 census
cen<-getEurostatRCV(kod = "cens_01news")
#Filter for the relevant countries
cen.g<-subset(cen, geo=='AT'| geo=='BE'| geo=='BG'| geo=='CY'
              | geo=='CZ'| geo=='DE'| geo=='DK'| geo=='EE'| geo=='EL'
              | geo=='ES'| geo=='FI'| geo=='FR'| geo=='HU'
              | geo=='IE'| geo=='IT'| geo=='LT'| geo=='LU'
              | geo=='LV'| geo=='MT'| geo=='NL'| geo=='PL'| geo=='PT'
              | geo=='RO'| geo=='SE'| geo=='SI'| geo=='SK'| geo=='UK')
#Recode the countries to match
cen.g$geo<-recode(cen.g$geo, '"UK "="GB "; "EL "="GR "')
###Filter to get the totals out of the table
cen.gs<-subset(cen.g, sex!="T")
cen.gse<-subset(cen.gs, isced97=='ED0' | isced97=='ED1' | isced97=='ED2'| isced97=='ED3' 
                | isced97=='ED4' | isced97=='ED5_6' | isced97=='NED' | isced97=='UNK')
cen.gsew<-subset(cen.gse, wstatus=='EMP' | wstatus=='INAC' | wstatus=='UNE' 
                 | wstatus=='NAP' | wstatus=='UNK')
cen.gsewa<-subset(cen.gsew, age!="TOTAL")
p<-droplevels(cen.gsewa)
#Get a unique identifier
p$full<-paste(sep="_",p$wstatus,p$age,p$sex,p$geo)
#Combine ED0 and ED1
for (i in 1:nrow(p)){
  if (p$isced97[i]=='ED0' & is.na(p$value[i])==F) {
    for (j in 1:nrow(p)){
      if (p$full[i]==p$full[j] & p$isced97[j]=='ED1'){
        p$value[j]<-p$value[i]+p$value[j]
      } else next  } } else next }
#New unique identifier
p$full1<-paste(sep="_",p$isced97,p$age,p$sex,p$geo)
#Combine NAP and INAC
for (i in 1:nrow(p)){
  if (p$wstatus[i]=='NAP' & is.na(p$value[i])==F) {
    for (j in 1:nrow(p)){
      if (p$full1[i]==p$full1[j] & p$wstatus[j]=='INAC'){
        p$value[j]<-p$value[i]+p$value[j]
      } else next } } else next }
#Combine UNK and INAC
for (i in 1:nrow(p)){
  if (p$wstatus[i]=='UNK' & is.na(p$value[i])==F) {
    for (j in 1:nrow(p)){
      if (p$full1[i]==p$full1[j] & p$wstatus[j]=='INAC'){
        p$value[j]<-p$value[i]+p$value[j]
      } else next } }  else   next }
#Remove the no longer necessary lines
p<-subset(p,p$wstatus!="UNK")
p<-subset(p,p$wstatus!="NAP")
p<-subset(p,p$isced97!="ED0")
p<-droplevels(p)
#calculate the population share
p$share<-NA
u<-unique(p$geo)
for (i in u) {
  p.s<-subset(p,p$geo==i)
  c.sum<-sum(p.s$value, na.rm=T)
  for (j in 1:nrow(p)) {
    if (p$geo[j]==i)
      p$share[j]<-p$value[j]/c.sum
    else next  } }
#Check
for (i in u){p.s<-subset(p,p$geo==i)
             print(i)
             print(sum(p.s$share, na.rm=T))}
#Get only the needed columns
popul<-p[,c(1,2,3,4,7,11)]
#Order
popul<-popul[order(popul$geo,popul$age,popul$sex,popul$isced97,popul$wstatus),]
#rename for ease
colnames(popul)<-c("work","edu","age","sex","geoID","share")
#strip the country strings
popul$geoID<-as.factor(strstrip(as.character(popul$geoID)))
#summary
summary(popul)
#save the data 
write.table(popul, 'E:/popul.txt')